#!/bin/bash

#PBS -l nodes=1:ppn=8,walltime=40:00:00
#PBS -N filter_gatk_snps
#PBS -M cmb433@nyu.edu
#PBS -m abe
#PBS -e localhost:/scratch/cmb433/awash_ddrad/rad-seq-primate-read-only/${PBS_JOBNAME}.e${PBS_JOBID}.${PBS_ARRAYID}
#PBS -o localhost:/scratch/cmb433/awash_ddrad/rad-seq-primate-read-only/${PBS_JOBNAME}.o${PBS_JOBID}.${PBS_ARRAYID}

# ------------------------------------------------------------------------------
# Variables
# ------------------------------------------------------------------------------

working_dir=/scratch/cmb433/awash_ddrad/rad-seq-primate-read-only

module load jdk/1.7.0

# ------------------------------------------------------------------------------
# Run pipeline
# ------------------------------------------------------------------------------

cd $working_dir

GATK=~/exome_macaque/bin/GATK
GENOME_FA=genomes/papAnu2/papAnu2.fa

if [ "$PBS_ARRAYID" -eq 21 ]; then
	CHROM=X
else
	CHROM=$PBS_ARRAYID
fi

java -Xmx2g -jar ${GATK}/GenomeAnalysisTK.jar \
	-R ${GENOME_FA} \
	-T VariantFiltration \
	-o chr${CHROM}.flt.vcf \
	--variant chr${CHROM}.raw.snps.indels.vcf \
	--filterExpression "QD < 2.0" \
	--filterName "QDfilter" \
	--filterExpression "MQ < 40.0" \
	--filterName "MQfilter" \
	--filterExpression "FS > 60.0" \
	--filterName "FSfilter" \
	--filterExpression "HaplotypeScore > 13.0" \
	--filterName "HAPSCfilter" \
	--filterExpression "MQRankSum < -12.5" \
	--filterName "MQRSfilter" \
	--filterExpression "ReadPosRankSum < -8.0" \
	--filterName "RPRSfilter" \
	--missingValuesInExpressionsShouldEvaluateAsFailing	

# Select variants with "FILTER=PASS" and are SNPs:
java -jar ${GATK}/GenomeAnalysisTK.jar \
	-T SelectVariants \
	-R ${GENOME_FA} \
	--variant chr${CHROM}.flt.vcf \
	--select_expressions "vc.isNotFiltered() && vc.isSNP()" \
	-o chr${CHROM}.pass.snp.vcf

exit;
